library(Seurat)
## Loading required package: ggplot2
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
## Loading required package: Matrix
library(scmap)
## Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:Matrix':
##
## colMeans, colSums, rowMeans, rowSums, which
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:Matrix':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:base':
##
## apply
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:matrixStats':
##
## count
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Data loading and inspection of the metadata.
load('/data/pub-others/tabula_muris/figshare/180126-facs/maca.seurat_obj.facs.figshare_180126.RData')
head(seurat_obj@meta.data)
sce_maca <- as.SingleCellExperiment(seurat_obj)
all10x <- readRDS('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/10x-180504')
sce_10x <- as.SingleCellExperiment(all10x)
#convert maca gene names to uppercase to match 10x gene names
rowData(sce_maca)['feature_symbol'] <- unlist(lapply(rowData(sce_maca)$gene, function(x){return(toupper(x))}))
rowData(sce_10x)['feature_symbol'] <- rowData(sce_10x)$gene
counts(sce_10x) <- as.matrix(counts(sce_10x))
logcounts(sce_10x) <- as.matrix(logcounts(sce_10x))
counts(sce_maca) <- as.matrix(counts(sce_maca))
logcounts(sce_maca) <- as.matrix(logcounts(sce_maca))
sce_maca <- selectFeatures(sce_maca, suppress_plot = FALSE)
All the different cell types.
as.data.frame(unique(seurat_obj@meta.data$tissue_cell_type))
Setting the right column for clustering.
sce_maca <- indexCluster(sce_maca, cluster_col = 'tissue_cell_type')
Predicting cell types in our dataset.
scmapCluster_results <- scmapCluster(
projection = sce_10x,
index_list = list(
sce_maca = metadata(sce_maca)$scmap_cluster_index
),
threshold=0.5 #default=0.7
)
## Warning in setFeatures(projection, rownames(index)): Features
## 1190002H23RIK, 2210415F13RIK, 4632428N05RIK, 8430408G22RIK, ADH1, C1QA,
## C1QB, C1QC, C1RA, C330006A16RIK, CAR2, CCL6, CD24A, CD53, CKMT1, CXCR7,
## CYP4B1, ERCC-00009, ERCC-00042, ERCC-00092, ERCC-00108, ERCC-00111,
## ERCC-00116, FCGR2B, FCGR3, FCRLS, FTL1, GIMAP6, GPI1, GPIHBP1, GPR34, H2-
## AA, H2-AB1, H2-D1, H2-DMA, H2-DMB2, H2-EB1, H2-K1, HMGCS2, IL11RA1, LASS2,
## LGALS7, LRRC33, LY6A, LY6C1, LY86, LYZ2, MS4A1, MT1, MT2, P2RY13, PGLYRP1,
## PKM2, SERPINB1A, SFPI1, SIGLECH, TPRGL, TRF, TUBB5 are not present in the
## 'SCESet' object and therefore were not set.
as.data.frame(table(scmapCluster_results$scmap_cluster_labs))
A lot of the cells got labeled unassigned (22,101 cells from a total of 56,371). This is much less when only using the old MACA fat dataset as reference (then, only 8,110 cells got labeled unassigned). This could be because there are multiple celltypes in the dataset that are likely the same (for example, adipose mesenchymal stem cells, bladder mesenchymal stem cells and thymus mesenchymal stem cells are all mesenchymal stem cells). Scmap requires that for every cell in our dataset to be assigned a certain celltype, the 3 nearest neighbors from the reference dataset should have that celltype. So say we have some mesenchymal cells in our data, then a lot of our cells will be unassigned because the nearest neighbors will be both bladder mesenchymal, thymus mesenchymal and adipose mesenchymal.
Also interesting to mention is that only 1 cell has an annotation from fat (Fat_smooth muscle cell).
Sankey diagram of how the annotations map to the clusters in our data (cluster 12 = mixture cluster):
plot(
getSankey(
colData(sce_10x)$res.0.5,
scmapCluster_results$scmap_cluster_labs[,"sce_maca"],
plot_height = 400
)
)
## starting httpd help server ... done
#Link: http://yggdrasil:7000/custom/googleVis/SankeyID4d53a14fdfb.html
The annotations for the mixture cluster (total of 1,139 cells, of which 376 are unassigned). Most cell types are only assigned for a few cells, but two celltypes have a higher number of assignments:
Mamary basal cell (333) Marrow hematopoietic stem cell (208)
as.data.frame(table(scmapCluster_results$scmap_cluster_labs[which(colData(sce_10x)$res.0.5 %in% 12),'sce_maca']))
Labeling in the tSNE.
#To prevent the plot being not visible because of too many labels.
predicted_labels_all <- as.data.frame(
row.names=rownames(sce_10x@colData),
x=as.vector(scmapCluster_results$scmap_cluster_labs))
names(predicted_labels_all) <- 'predicted_labels'
all10x <- AddMetaData(all10x, metadata=predicted_labels_all, col.name='predicted_labels')
TSNEPlot(all10x, group.by='predicted_labels', pt.size=1, do.label=T, label.size=6)
TSNEPlot(all10x, group.by='sample_name', pt.size=0.1, do.label=T)
Cell types in the fat dataset.
seurat_obj@meta.data %>% filter(tissue=="Fat") %>% distinct(tissue_cell_type)
Subsetting and preparing the data.
maca_fat <- SubsetData(SetAllIdent(seurat_obj, id='tissue'), ident.use="Fat")
sce_maca_fat <- as.SingleCellExperiment(maca_fat)
rowData(sce_maca_fat)['feature_symbol'] <- unlist(lapply(rowData(sce_maca_fat)$gene, function(x){return(toupper(x))}))
counts(sce_maca_fat) <- as.matrix(counts(sce_maca_fat))
logcounts(sce_maca_fat) <- as.matrix(logcounts(sce_maca_fat))
sce_maca_fat <- selectFeatures(sce_maca_fat, suppress_plot = FALSE)
Setting the right column for clustering.
sce_maca_fat <- indexCluster(sce_maca_fat, cluster_col = 'cell_ontology_class')
Predicting cell types in our dataset.
scmapCluster_results_fat <- scmapCluster(
projection = sce_10x,
index_list = list(
sce_maca_fat = metadata(sce_maca_fat)$scmap_cluster_index
),
threshold=0.5 #default=0.7
)
## Warning in setFeatures(projection, rownames(index)): Features
## 1190002H23RIK, 8430408G22RIK, ADH1, AW112010, C1RA, C4B, CAR4, CCL6,
## CCL9, CCR2, CD2, CD24A, CD48, CD53, CXCR7, CYB5, CYBB, CYP4B1, CYP4F18,
## ERCC-00009, ERCC-00108, F13A1, FCGR2B, FCGR3, GIMAP3, GIMAP6, GM11428,
## GPIHBP1, H2-AA, H2-AB1, H2-D1, H2-DMA, H2-DMB1, H2-DMB2, H2-EB1, H2-K1, H2-
## OB, H2-Q6, HMGCS2, IFI205, IFI27L2A, IL11RA1, LILRB4, LRRC33, LY6A, LY6C1,
## LY86, LYZ2, MGL2, MMP23, MRC1, MS4A1, MS4A4B, MS4A4C, MS4A4D, MS4A6B,
## MS4A6C, MT1, NEURL3, PECAM1, PGCP, RETNLA, SERPINB6A, SFPI1, SLFN2, SPNB2,
## TPRGL, TRF are not present in the 'SCESet' object and therefore were not
## set.
as.data.frame(table(scmapCluster_results_fat$scmap_cluster_labs))
Now less cells are unassigned. Are the mesenchymal stem cell of adipose the multipotent progenitors from the old MACA dataset? (similar nr of annotations)
Predictions in cluster 12:
as.data.frame(table(scmapCluster_results_fat$scmap_cluster_labs[which(colData(sce_10x)$res.0.5 %in% 12), 'sce_maca_fat']))
Interestingly, a lot of epithelial cell predictions and not that much mesenchymal stem cell predictions.
Sankey diagram:
plot(
getSankey(
colData(sce_10x)$res.0.5,
scmapCluster_results_fat$scmap_cluster_labs[,"sce_maca_fat"],
plot_height = 400
)
)
#Link: http://yggdrasil:7000/custom/googleVis/SankeyID4d539a2079.html
predicted_labels_fat <- as.data.frame(
row.names=rownames(sce_10x@colData),
x=as.vector(scmapCluster_results_fat$scmap_cluster_labs))
names(predicted_labels_fat) <- 'predicted_labels_fat'
all10x <- AddMetaData(all10x, metadata=predicted_labels_fat, col.name='predicted_labels_fat')
TSNEPlot(all10x, group.by='predicted_labels_fat', pt.size=0.1, do.label=T)
TSNEPlot(all10x, group.by='sample_name', pt.size=0.1, do.label=T)